sumvip. tv

$1336

Model HF Main Model Name HF Draft Model Name (speculative decoding) Size Format API GPU GPU

Quantity

Add to cart

Add to wish list

SKU:714
Category:Casino Games
Tags:roulette

Description

Product description

　　Model

　　HF Main Model Name

　　HF Draft Model Name (speculative decoding)

　　Size

　　Format

　　API

　　GPU

　　GPU Mem

　　Run

　　Duration

　　Total

　　TIGER-Lab

　　Correct Random Guesses

　　Prompt tokens

　　tk/s

　　Completion tokens

　　tk/s

　　claude-3-5-sonnet-20241022

　　Anthropic

　　1/2

　　31m 50s

　　340/410

　　82.93%

　　~= 82.44%

　　694458

　　362.78

　　97438

　　50.90

　　claude-3-5-sonnet-20241022

　　Anthropic

　　2/2

　　31m 39s

　　338/410

　　82.44%

　　== 82.44%

　　694458

　　364.82

　　97314

　　51.12

　　gemini-1.5-pro-002

　　Gemini

　　1/2

　　31m 7s

　　335/410

　　81.71%

　　> 71.22%

　　648675

　　346.82

　　78311

　　41.87

　　gemini-1.5-pro-002

　　Gemini

　　2/2

　　30m 40s

　　327/410

　　79.76%

　　> 71.22%

　　648675

　　351.73

　　76063

　　41.24

　　QwQ-32B-Preview (8.0bpw EXL2, max_tokens=16384)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38436MiB

　　1/2

　　2h 3m 30s

　　325/410

　　79.27%

　　0/2, 0.00%

　　656716

　　88.58

　　327825

　　44.22

　　QwQ-32B-Preview (8.0bpw EXL2, max_tokens=16384)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38436MiB

　　2/2

　　2h 3m 35s

　　324/410

　　79.02%

　　656716

　　88.52

　　343440

　　46.29

　　Athene-V2-Chat (72B, 4.65bpw EXL2, Q4 cache)

　　wolfram/Athene-V2-Chat-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　44496MiB

　　1/2

　　2h 13m 5s

　　326/410

　　79.51%

　　> 73.41%

　　656716

　　82.21

　　142256

　　17.81

　　Athene-V2-Chat (72B, 4.65bpw EXL2, Q4 cache)

　　wolfram/Athene-V2-Chat-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　44496MiB

　　2/2

　　2h 14m 53s

　　317/410

　　77.32%

　　> 73.41%

　　656716

　　81.11

　　143659

　　17.74

　　Qwen2.5-72B-Instruct (4.65bpw EXL2, Q4 cache)

　　LoneStriker/Qwen2.5-72B-Instruct-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　2x RTX 3090

　　41150MiB

　　1/2

　　3h 7m 58s

　　320/410

　　78.05%

　　> 74.88%

　　656716

　　58.21

　　139499

　　12.36

　　Qwen2.5-72B-Instruct (4.65bpw EXL2, Q4 cache)

　　LoneStriker/Qwen2.5-72B-Instruct-4.65bpw-h6-exl2

　　72B

　　EXL2

　　TabbyAPI

　　2x RTX 3090

　　41150MiB

　　2/2

　　3h 5m 19s

　　319/410

　　77.80%

　　> 74.88%

　　656716

　　59.04

　　138135

　　12.42

　　QwQ-32B-Preview (4.25bpw EXL2, max_tokens=16384)

　　bartowski/QwQ-32B-Preview-exl2_4_25

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　27636MiB

　　1/2

　　1h 56m 8s

　　319/410

　　77.80%

　　0/1, 0.00%

　　656716

　　94.20

　　374973

　　53.79

　　QwQ-32B-Preview (4.25bpw EXL2, max_tokens=16384)

　　bartowski/QwQ-32B-Preview-exl2_4_25

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　27636MiB

　　2/2

　　1h 55m 44s

　　318/410

　　77.56%

　　656716

　　94.45

　　377638

　　54.31

　　gpt-4o-2024-08-06

　　OpenAI

　　1/2

　　34m 54s

　　320/410

　　78.05%

　　~= 78.29%

　　1/2, 50.00%

　　631448

　　300.79

　　99103

　　47.21

　　gpt-4o-2024-08-06

　　OpenAI

　　2/2

　　42m 41s

　　316/410

　　77.07%

　　~< 78.29%

　　1/3, 33.33%

　　631448

　　246.02

　　98466

　　38.36

　　QwQ-32B-Preview (8.0bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38528MiB

　　1/4

　　1h 29m 49s

　　324/410

　　79.02%

　　0/1, 0.00%

　　656716

　　121.70

　　229008

　　42.44

　　QwQ-32B-Preview (8.0bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38528MiB

　　2/4

　　1h 32m 30s

　　314/410

　　76.59%

　　0/2, 0.00%

　　656716

　　118.24

　　239161

　　43.06

　　QwQ-32B-Preview (8.0bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　37000MiB

　　3/4

　　2h 25m 24s

　　308/410

　　75.12%

　　0/2, 0.00%

　　656716

　　75.23

　　232208

　　26.60

　　QwQ-32B-Preview (8.0bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　37000MiB

　　4/4

　　2h 27m 27s

　　305/410

　　74.39%

　　0/3, 0.00%

　　656716

　　74.19

　　235650

　　26.62

　　QwQ-32B-Preview-abliterated (4.5bpw EXL2, max_tokens=16384)

　　ibrahimkettaneh_QwQ-32B-Preview-abliterated-4.5bpw-h8-exl2

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　28556MiB

　　1/2

　　2h 10m 53s

　　310/410

　　75.61%

　　656716

　　83.59

　　412512

　　52.51

　　QwQ-32B-Preview-abliterated (4.5bpw EXL2, max_tokens=16384)

　　ibrahimkettaneh_QwQ-32B-Preview-abliterated-4.5bpw-h8-exl2

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　28556MiB

　　2/2

　　2h 25m 29s

　　310/410

　　75.61%

　　656716

　　75.20

　　478590

　　54.80

　　mistral-large-2407 (123B)

　　mistralai/Mistral-Large-Instruct-2407

　　123B

　　Mistral

　　1/2

　　40m 23s

　　310/410

　　75.61%

　　> 70.24%

　　696798

　　287.13

　　79444

　　32.74

　　mistral-large-2407 (123B)

　　mistralai/Mistral-Large-Instruct-2407

　　123B

　　Mistral

　　2/2

　　46m 55s

　　308/410

　　75.12%

　　> 70.24%

　　0/1, 0.00%

　　696798

　　247.21

　　75971

　　26.95

　　Llama-3.1-405B-Instruct-FP8

　　meta-llama/Llama-3.1-405B-Instruct-FP8

　　405B

　　IONOS

　　1/2

　　2h 5m 28s

　　311/410

　　75.85%

　　648580

　　86.11

　　79191

　　10.51

　　Llama-3.1-405B-Instruct-FP8

　　meta-llama/Llama-3.1-405B-Instruct-FP8

　　405B

　　IONOS

　　2/2

　　2h 10m 19s

　　307/410

　　74.88%

　　648580

　　82.90

　　79648

　　10.18

　　mistral-large-2411 (123B)

　　mistralai/Mistral-Large-Instruct-2411

　　123B

　　Mistral

　　1/2

　　41m 46s

　　302/410

　　73.66%

　　1/3, 33.33%

　　696798

　　277.70

　　82028

　　32.69

　　mistral-large-2411 (123B)

　　mistralai/Mistral-Large-Instruct-2411

　　123B

　　Mistral

　　2/2

　　32m 47s

　　300/410

　　73.17%

　　0/1, 0.00%

　　696798

　　353.53

　　77998

　　39.57

　　QwQ-32B-Preview (4.25bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_4_25

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　26198MiB

　　1/4

　　1h 39m 49s

　　308/410

　　75.12%

　　0/1, 0.00%

　　656716

　　109.59

　　243552

　　40.64

　　QwQ-32B-Preview (4.25bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_4_25

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　27750MiB

　　2/4

　　1h 22m 12s

　　304/410

　　74.15%

　　656716

　　133.04

　　247314

　　50.10

　　QwQ-32B-Preview (4.25bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_4_25

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　27750MiB

　　3/4

　　1h 21m 39s

　　296/410

　　72.20%

　　656716

　　133.94

　　246020

　　50.18

　　QwQ-32B-Preview (4.25bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_4_25

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　26198MiB

　　4/4

　　1h 42m 33s

　　294/410

　　71.71%

　　656716

　　106.63

　　250222

　　40.63

　　chatgpt-4o-latest @ 2024-11-18

　　OpenAI

　　1/2

　　28m 17s

　　302/410

　　73.66%

　　< 78.29%

　　2/4, 50.00%

　　631448

　　371.33

　　146558

　　86.18

　　chatgpt-4o-latest @ 2024-11-18

　　OpenAI

　　2/2

　　28m 31s

　　298/410

　　72.68%

　　< 78.29%

　　2/2, 100.00%

　　631448

　　368.19

　　146782

　　85.59

　　gpt-4o-2024-11-20

　　OpenAI

　　1/2

　　25m 35s

　　296/410

　　72.20%

　　1/7, 14.29%

　　631448

　　410.38

　　158694

　　103.14

　　gpt-4o-2024-11-20

　　OpenAI

　　2/2

　　26m 10s

　　294/410

　　71.71%

　　1/7, 14.29%

　　631448

　　400.95

　　160378

　　101.84

　　Llama-3.1-70B-Instruct

　　meta-llama/Llama-3.1-70B-Instruct

　　70B

　　IONOS

　　1/2

　　41m 12s

　　291/410

　　70.98%

　　> 66.34%

　　3/12, 25.00%

　　648580

　　261.88

　　102559

　　41.41

　　Llama-3.1-70B-Instruct

　　meta-llama/Llama-3.1-70B-Instruct

　　70B

　　IONOS

　　2/2

　　39m 48s

　　287/410

　　70.00%

　　> 66.34%

　　3/14, 21.43%

　　648580

　　271.12

　　106644

　　44.58

　　gemini-1.5-flash-002

　　Gemini

　　1/2

　　13m 19s

　　288/410

　　70.24%

　　> 63.41%

　　1/6, 16.67%

　　648675

　　808.52

　　80535

　　100.38

　　gemini-1.5-flash-002

　　Gemini

　　2/2

　　22m 30s

　　285/410

　　69.51%

　　> 63.41%

　　2/7, 28.57%

　　648675

　　479.42

　　80221

　　59.29

　　Llama-3.2-90B-Vision-Instruct

　　meta-llama/Llama-3.2-90B-Vision-Instruct

　　90B

　　Azure

　　1/2

　　33m 6s

　　289/410

　　70.49%

　　4/7, 57.14%

　　640380

　　321.96

　　88997

　　44.74

　　Llama-3.2-90B-Vision-Instruct

　　meta-llama/Llama-3.2-90B-Vision-Instruct

　　90B

　　Azure

　　2/2

　　31m 31s

　　281/410

　　68.54%

　　2/5, 40.00%

　　640380

　　338.10

　　85381

　　45.08

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-3B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45880MiB

　　1/7

　　41m 59s

　　289/410

　　70.49%

　　656716

　　260.29

　　92126

　　36.51

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　40036MiB

　　2/7

　　34m 24s

　　286/410

　　69.76%

　　656716

　　317.48

　　89487

　　43.26

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-3B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45880MiB

　　3/7

　　41m 27s

　　283/410

　　69.02%

　　0/1, 0.00%

　　656716

　　263.62

　　90349

　　36.27

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　bartowski/Qwen2.5-Coder-7B-Instruct-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　43688MiB

　　4/7

　　42m 32s

　　283/410

　　69.02%

　　0/1, 0.00%

　　656716

　　256.77

　　90899

　　35.54

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　bartowski/Qwen2.5-Coder-7B-Instruct-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　43688MiB

　　5/7

　　44m 34s

　　282/410

　　68.78%

　　0/1, 0.00%

　　656716

　　245.24

　　96470

　　36.03

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　38620MiB

　　6/7

　　1h 2m 8s

　　282/410

　　68.78%

　　656716

　　175.98

　　92767

　　24.86

　　Qwen2.5-Coder-32B-Instruct (8.0bpw EXL2)

　　bartowski/Qwen2.5-Coder-32B-Instruct-exl2_8_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　40036MiB

　　7/7

　　34m 56s

　　280/410

　　68.29%

　　656716

　　312.66

　　91926

　　43.76

　　QwQ-32B-Preview (3.0bpw EXL2, max_tokens=8192)

　　bartowski/QwQ-32B-Preview-exl2_3_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　22990MiB

　　1/2

　　1h 15m 18s

　　289/410

　　70.49%

　　656716

　　145.23

　　269937

　　59.69

　　QwQ-32B-Preview (3.0bpw EXL2, max_tokens=8192)

　　bartowski/QwQ-32B-Preview-exl2_3_0

　　Qwen/Qwen2.5-Coder-0.5B-Instruct

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　22990MiB

　　2/2

　　1h 19m 50s

　　274/410

　　66.83%

　　0/2, 0.00%

　　656716

　　137.01

　　291818

　　60.88

　　Mistral-Large-Instruct-2411 (123B, 3.0bpw EXL2)

　　MikeRoz/mistralai_Mistral-Large-Instruct-2411-3.0bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　47068MiB

　　1/2

　　1h 26m 26s

　　284/410

　　69.27%

　　1/3, 33.33%

　　696798

　　134.23

　　79925

　　15.40

　　Mistral-Large-Instruct-2411 (123B, 3.0bpw EXL2)

　　MikeRoz/mistralai_Mistral-Large-Instruct-2411-3.0bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　47068MiB

　　2/2

　　1h 26m 10s

　　275/410

　　67.07%

　　0/2, 0.00%

　　696798

　　134.67

　　79778

　　15.42

　　Mistral-Large-Instruct-2407 (123B, 2.75bpw EXL2)

　　turboderp/Mistral-Large-Instruct-2407-123B-exl2_2.75bpw

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　1/2

　　1h 8m 8s

　　271/410

　　66.10%

　　< 70.24%

　　696798

　　170.29

　　66670

　　16.29

　　Mistral-Large-Instruct-2407 (123B, 2.75bpw EXL2)

　　turboderp/Mistral-Large-Instruct-2407-123B-exl2_2.75bpw

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　2/2

　　1h 10m 38s

　　268/410

　　65.37%

　　< 70.24%

　　1/3, 33.33%

　　696798

　　164.23

　　69182

　　16.31

　　QwQ-32B-Preview (3.0bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_3_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　21574MiB

　　1/2

　　1h 5m 30s

　　268/410

　　65.37%

　　1/3, 33.33%

　　656716

　　166.95

　　205218

　　52.17

　　QwQ-32B-Preview (3.0bpw EXL2)

　　bartowski/QwQ-32B-Preview-exl2_3_0

　　32B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　21574MiB

　　2/2

　　1h 8m 44s

　　266/410

　　64.88%

　　656716

　　159.10

　　215616

　　52.24

　　Mistral-Large-Instruct-2411 (123B, 2.75bpw EXL2)

　　wolfram/Mistral-Large-Instruct-2411-2.75bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　1/2

　　1h 11m 50s

　　267/410

　　65.12%

　　1/4, 25.00%

　　696798

　　161.53

　　70538

　　16.35

　　Mistral-Large-Instruct-2411 (123B, 2.75bpw EXL2)

　　wolfram/Mistral-Large-Instruct-2411-2.75bpw-h6-exl2

　　123B

　　EXL2

　　TabbyAPI

　　RTX 6000

　　45096MiB

　　2/2

　　1h 13m 50s

　　243/410

　　59.27%

　　0/4, 0.00%

　　696798

　　157.18

　　72718

　　16.40

　　mistral-small-2409 (22B)

　　mistralai/Mistral-Small-Instruct-2409

　　22B

　　Mistral

　　1/2

　　25m 3s

　　243/410

　　59.27%

　　> 53.66%

　　1/4, 25.00%

　　696798

　　462.38

　　73212

　　48.58

　　mistral-small-2409 (22B)

　　mistralai/Mistral-Small-Instruct-2409

　　22B

　　Mistral

　　2/2

　　20m 45s

　　239/410

　　58.29%

　　> 53.66%

　　1/4, 25.00%

　　696798

　　558.10

　　76017

　　60.89

sumvip. tv

Product description

Related products

s666 tem

good88 good88 store

tải go88 mkstalin

go88 bắn cá